*==============================================================================*
*       Effects of Tier 1 Exhaust Standards on Used Vehicle Emissions          *
*==============================================================================*

u "dataSTATA/combined/combined_smogcheck_colorado.dta", clear

sampleSelectionDD

keep if model_year >= 1982 & model_year <= 2000

g   standard_CO2 = standard_CO

g cmodel_year = model_year - 1993
g ldtXmodel_year = ldt * model_year

destring *std, replace
ren v_co_std  standardSmogcheck_CO
ren v_hc_std  standardSmogcheck_HC
ren v_nox_std standardSmogcheck_NOX
g lnstandardSmogcheck_CO  = ln(standardSmogcheck_CO)
g lnstandardSmogcheck_HC  = ln(standardSmogcheck_HC)
g lnstandardSmogcheck_NOX = ln(standardSmogcheck_NOX)


g lncafe_standard = ln(cafe_standard)
g lngasCostPerMile = ln(gasCostPerMile)
* replacing missing gas costs (i.e., missing MPG since not in vin decoder?)
* with mean gas cost by model year*ldt, so it doesn't drop 2.5% of obs
egen meanGasCost = mean(gasCostPerMile), by(model_year ldt)
replace lngasCostPerMile = ln(meanGasCost) if lngasCostPerMile >= .
g lnso2_per_gallon = ln(so2_per_gallon)

g lnsh_ethanol     = ln(sh_ethanol)
g lnsh_ethanolXldt = lnsh_ethanol * ldt


* it's tricky to output xmean (estadd isn't working) so manually put them 
* in a matrix
loc col = 1

g years9000 = model_year >= 1990 & model_year <= 2000

foreach p in CO HC {
	* so log and level use same sample, this affects a third of a percent of obs
	* that may be incorrect anyways.
	replace emissions_used_`p' = . if emissions_used_`p' == 0
	
	reghdfe  lnemissions_used_`p' lnstandard_`p' odometer odometer_2, cluster(id_cluster) absorb(age ldt)
		est store m1_`p'

	reghdfe  lnemissions_used_`p' lnstandard_`p' odometer odometer_2, cluster(id_cluster) absorb(model_year age ldt)
		est store m2_`p'

	reghdfe  lnemissions_used_`p' lnstandard_`p' odometer odometer_2 lncafe_standard lnstandardSmogcheck_`p' lngasCostPerMile lnsh_ethanol lnso2_per_gallon, cluster(id_cluster) absorb(model_year age ldt)
		est store m3_`p'

	reghdfe  lnemissions_used_`p' lnstandard_`p' odometer odometer_2 ldtXmodel_year, cluster(id_cluster) absorb(model_year age ldt)
		est store m4_`p'

	reghdfe  lnemissions_used_`p' lnstandard_`p' odometer odometer_2 if age >= 4 & age <= 6, cluster(id_cluster) absorb(model_year age ldt)
		est store m5_`p'

	reghdfe  lnemissions_used_`p' lnstandard_`p' odometer odometer_2 if years9000 == 1, cluster(id_cluster) absorb(model_year age ldt)
		est store m6_`p'

	reghdfe  emissions_used_`p' standard_`p' odometer odometer_2, cluster(id_cluster) absorb(model_year age ldt)
		est store m7_`p'

loc col = `col' + 7
}

g n = _n
reshape long emissions_used_ lnemissions_used_ standard_ lnstandard_ lnstandardSmogcheck_, i(n) j(pollutant) string
ren emissions_used_   emissions_used
ren lnemissions_used_ lnemissions_used
ren standard_    standard
ren lnstandard_  lnstandard
ren lnstandardSmogcheck_ lnstandardSmogcheck

drop if pollutant == "MPG"

* this is needed so the regression doesn't drop CO2 observations, which don't have a smogcheck standard
replace lnstandardSmogcheck = lnstandard if pollutant == "CO2"

egen model_yearXpollutant = group(model_year pollutant)
egen ldtXpollutant = group(ldt pollutant)
egen ageXpollutant = group(age pollutant)

keep if inlist(pollutant, "CO", "HC") & lnemissions_used < .

***** estimates pooling all pollutants
reghdfe  lnemissions_used lnstandard odometer odometer_2 , cluster(id_cluster) absorb(ldtXpollutant ageXpollutant)
	est store m1_all

reghdfe  lnemissions_used lnstandard odometer odometer_2 , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant ageXpollutant)
	est store m2_all

reghdfe  lnemissions_used lnstandard odometer odometer_2 lncafe_standard lnstandardSmogcheck lngasCostPerMile lnsh_ethanol lnso2_per_gallon , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant ageXpollutant)
	est store m3_all

reghdfe  lnemissions_used lnstandard odometer odometer_2 ldtXmodel_year , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant ageXpollutant)
	est store m4_all

reghdfe  lnemissions_used lnstandard odometer odometer_2  if age >= 4 & age <= 6, cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant ageXpollutant)
	est store m5_all

reghdfe  lnemissions_used lnstandard odometer odometer_2 if years9000 == 1, cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant ageXpollutant)
	est store m6_all

reghdfe  emissions_used standard odometer odometer_2, cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant ageXpollutant)
	est store m7_all



*==============================================================================*
*                         new vehicle emissions tests                          *
*==============================================================================*

u "replicationFiles/dataSTATA/combined/combined_newcars.dta", clear

* non-missing used vehicle emissions tests for all pollutants including CO2
keep if emissions_new_CO < . & emissions_new_HC < . & emissions_new_NOX < . 

*-------------------------- define additional variables -----------------------*
foreach v of varlist emissions_new_* standard_CO standard_HC standard_NOX {
	g ln`v' = ln(`v')
}

keep if model_year >= 1982 & model_year <= 2000


g cmodel_year = model_year - 1993
g ldtXmodel_year = ldt * model_year

g lncafe_standard = ln(cafe_standard)
* replacing missing gas costs (i.e., missing MPG since not in vin decoder?)
* with mean gas cost by model year*ldt, so it doesn't drop 2.5% of obs


* it's tricky to output xmean (estadd isn't working) so manually put them 
* in a matrix
loc col = 1

g years9000 = model_year >= 1990 & model_year <= 2000

foreach p in CO HC {
	* so log and level use same sample, this affects a third of a percent of obs
	* that may be incorrect anyways.
	replace emissions_new_`p' = . if emissions_new_`p' == 0
	
	reghdfe  lnemissions_new_`p' lnstandard_`p' , cluster(id_cluster) absorb(ldt)
		est store n1_`p'

	reghdfe  lnemissions_new_`p' lnstandard_`p' , cluster(id_cluster) absorb(model_year ldt)
		est store n2_`p'

	reghdfe  lnemissions_new_`p' lnstandard_`p' lncafe_standard , cluster(id_cluster) absorb(model_year ldt)
		est store n3_`p'

	reghdfe  lnemissions_new_`p' lnstandard_`p' ldtXmodel_year , cluster(id_cluster) absorb(model_year ldt)
		est store n4_`p'

	reghdfe  lnemissions_new_`p' lnstandard_`p' , cluster(id_cluster) absorb(model_year ldt)
		est store n5_`p'

	reghdfe  lnemissions_new_`p' lnstandard_`p' if years9000 == 1, cluster(id_cluster) absorb(model_year ldt)
		est store n6_`p'

	reghdfe  emissions_new_`p' standard_`p', cluster(id_cluster) absorb(model_year ldt)
		est store n7_`p'

loc col = `col' + 7
}

g n = _n
reshape long emissions_new_ lnemissions_new_ standard_ lnstandard_, i(n) j(pollutant) string
ren emissions_new_   emissions_new
ren lnemissions_new_ lnemissions_new
ren standard_    standard
ren lnstandard_  lnstandard

drop if pollutant == "MPG"

* this is needed so the regression doesn't drop CO2 observations, which don't have a smogcheck standard
egen model_yearXpollutant = group(model_year pollutant)
egen ldtXpollutant = group(ldt pollutant)

keep if inlist(pollutant, "CO", "HC") & lnemissions_new < .

***** estimates pooling all pollutants
reghdfe  lnemissions_new lnstandard , cluster(id_cluster) absorb(ldtXpollutant )
	est store n1_all

reghdfe  lnemissions_new lnstandard , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant)
	est store n2_all

reghdfe  lnemissions_new lnstandard lncafe_standard , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant)
	est store n3_all

reghdfe  lnemissions_new lnstandard ldtXmodel_year , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant )
	est store n4_all

reghdfe  lnemissions_new lnstandard , cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant)
	est store n5_all

reghdfe  lnemissions_new lnstandard if years9000 == 1, cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant)
	est store n6_all

reghdfe  emissions_new standard, cluster(id_cluster) absorb(model_yearXpollutant ldtXpollutant)
	est store n7_all


estout m* n* ///
using "results/tables/t3_tier1.txt", ///
	cells( b(star fmt(%9.2f)) se(par(`"="("'`")""'))) ///
	title("") style(tab) keep(*standard*) ///
	stats(N N_clust, fmt(0 0 3 3) labels(N "Clusters")) label collabels(, none) ///
	starlevels(* .10 ** .05 *** .01) replace

	
	
	